Final Project

John Beall

November 27, 2022

# Load tidyverse for manipulating data
#Load ggplot2, and plotly for graphing data
library(tidyverse, warn.conflicts = F)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2, warn.conflicts = F)
library(plotly, warn.conflicts = F)
## Warning: package 'plotly' was built under R version 4.2.2
#Read in the energy data
energy = read_csv("archive/organised_Gen.csv")
## New names:
## Rows: 496774 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): STATE, TYPE OF PRODUCER, ENERGY SOURCE dbl (4): ...1, YEAR, MONTH,
## GENERATION (Megawatthours)
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
glimpse(energy)
## Rows: 496,774
## Columns: 7
## $ ...1                         <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,…
## $ YEAR                         <dbl> 2001, 2001, 2001, 2001, 2001, 2001, 2001,…
## $ MONTH                        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ STATE                        <chr> "AK", "AK", "AK", "AK", "AK", "AK", "AK",…
## $ `TYPE OF PRODUCER`           <chr> "Total Electric Power Industry", "Total E…
## $ `ENERGY SOURCE`              <chr> "Coal", "Petroleum", "Natural Gas", "Hydr…
## $ `GENERATION (Megawatthours)` <dbl> 46903, 71085, 367521, 104549, 87, 590145,…
#renaming the columns and removing the "ID" column
colnames(energy) = c("ID", "year", "month", "state", "producer", "source", "generation")
energy = energy %>% select(-`ID`)
energy$Date<-as.Date(with(energy,paste(year,month,Day = 1,sep="-")),"%Y-%m-%d")
energy
## # A tibble: 496,774 × 7
##     year month state producer                          source gener…¹ Date      
##    <dbl> <dbl> <chr> <chr>                             <chr>    <dbl> <date>    
##  1  2001     1 AK    Total Electric Power Industry     Coal     46903 2001-01-01
##  2  2001     1 AK    Total Electric Power Industry     Petro…   71085 2001-01-01
##  3  2001     1 AK    Total Electric Power Industry     Natur…  367521 2001-01-01
##  4  2001     1 AK    Total Electric Power Industry     Hydro…  104549 2001-01-01
##  5  2001     1 AK    Total Electric Power Industry     Wind        87 2001-01-01
##  6  2001     1 AK    Total Electric Power Industry     Total   590145 2001-01-01
##  7  2001     1 AK    Electric Generators, Electric Ut… Coal     18410 2001-01-01
##  8  2001     1 AK    Electric Generators, Electric Ut… Petro…   64883 2001-01-01
##  9  2001     1 AK    Electric Generators, Electric Ut… Natur…  305277 2001-01-01
## 10  2001     1 AK    Electric Generators, Electric Ut… Hydro…  104549 2001-01-01
## # … with 496,764 more rows, and abbreviated variable name ¹​generation

##US total power generation per year

energy %>% 
  filter(producer == "Total Electric Power Industry", state == "US-TOTAL") %>% 
  ggplot() + 
  geom_point(aes(x = year, y = generation, color = source)) +
  geom_smooth(aes(x = year, y = generation, color = source)) +
  labs(title = "US total power generation per year")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Total power generation per state

energy %>% 
  filter(producer == "Total Electric Power Industry", state != "US-TOTAL") %>% 
  ggplot() +
  geom_point(aes(x = state, y = generation, color = source)) +
  labs(title = "Total power generation per state")

Total Power Generation Per Year

energy %>% filter(producer == "Total Electric Power Industry", state != "US-TOTAL", source != "Total", year != 2022) %>% group_by(source, year) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>% 
  ggplot() +
  geom_col(aes(x= year, y= total_generation)) +
  geom_smooth(aes(x = year, y= total_generation)) +
  facet_wrap(~source) +
  labs(title = "Total Power Generation Per Year", x = "Year", y = "Power Generation(TWH)")
## `summarise()` has grouped output by 'source'. You can override using the
## `.groups` argument.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Total Solar Power Generation Per Year

energy %>% filter(producer == "Total Electric Power Industry", state != "US-TOTAL", source == "Solar Thermal and Photovoltaic", year != 2022) %>% group_by(year) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>% 
  ggplot() +
  geom_col(aes(x= year, y= total_generation)) +
  geom_smooth(aes(x = year, y= total_generation)) +
  labs(title = "Total Solar Power Generation Per Year", x = "Year", y = "Power Generation(TWH)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Total Wind Power Generation Per Year

energy %>% filter(producer == "Total Electric Power Industry", state != "US-TOTAL", source == "Wind", year != 2022) %>% group_by(year) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>% 
  ggplot() +
  geom_col(aes(x= year, y= total_generation)) +
  geom_smooth(aes(x = year, y= total_generation)) +
  labs(title = "Total Wind Power Generation Per Year", x = "Year", y = "Power Generation(TWH)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Total Petroleum Power Generation Per Year

energy %>% filter(producer == "Total Electric Power Industry", state != "US-TOTAL", source == "Petroleum", year != 2022) %>% group_by(year) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>% 
  ggplot() +
  geom_col(aes(x= year, y= total_generation)) +
  geom_smooth(aes(x = year, y= total_generation)) +
  labs(title = "Total Petroleum Power Generation Per Year", x = "Year", y = "Power Generation(TWH)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Total Power Generation Over Time

energy %>% filter(producer == "Total Electric Power Industry", state != "US-TOTAL", year != 2022, source == c("Solar Thermal and Photovoltaic", "Petroleum", "Wind")) %>% group_by(Date, source) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>%
  ggplot() +
  geom_line(aes(x= Date, y= total_generation, color = source)) +
  geom_smooth(aes(x = Date, y= total_generation, color = source)) +
  labs(title = "Total Power Generation Over Time", x = "Date", y = "Power Generation(TWH)")
## Warning in source == c("Solar Thermal and Photovoltaic", "Petroleum", "Wind"):
## longer object length is not a multiple of shorter object length
## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Accumulative Power Generation Over Time (2001-2022)

energy %>% filter(producer == "Total Electric Power Industry", state != "US-TOTAL", source == "Solar Thermal and Photovoltaic") %>%
  ggplot() +
  geom_col(aes(x= state, y= generation)) + labs(title = "Accumulative Power Generation Over Time (2001-2022)", x = "States", y = "Generation (MWH)") +
  coord_flip() 

U.S. Total Power Generated

energy %>% 
  ggplot() +
  geom_col(aes(x = year, y = generation, fill = source)) +
  facet_wrap(vars(source)) + labs(title= "U.S. Total Power Generated", x = "Year(2001-2022)", y= "Generation(MWH)")

States with negative power generation (<-5000)

energy %>% 
  filter(producer == "Total Electric Power Industry", state != "US-TOTAL", generation < -5000) %>% 
  ggplot() +
  geom_point(aes(x = state, y = generation, color = source)) +
  labs(title = "States with negative power generation (<-5000)", x = "State", y= "Generation(MWH)")

US goal is to install an average of 30 GW of solar capacity per year between now and 2025 and 60 GW per year from 2025-2030. https://www.renewable-ei.org/pdfdownload/activities/01_Key_AlejandroMoreno.pdf

Texas total power generation per year

energy %>% 
  filter(producer == "Total Electric Power Industry", state == "TX") %>% 
  ggplot() + 
  geom_point(aes(x = year, y = generation, color = source)) +
  geom_smooth(aes(x = year, y = generation, color = source)) +
  labs(title = "Texas total power generation per year", x = "Year(2001-2022)", y= "Generation(MWH)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Texas total power generation per month

energy %>% 
  filter(producer == "Total Electric Power Industry", state == "TX") %>% 
  ggplot() + 
  geom_point(aes(x = month, y = generation, color = source)) +
  geom_smooth(aes(x = month, y = generation, color = source)) +
  labs(title = "Texas total power generation per month", x = "Month", y= "Generation(MWH)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Texas Total Wind Power Generation Per Year

energy %>% filter(producer == "Total Electric Power Industry", state == "TX", source == "Wind", year != 2022) %>% group_by(year) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>% 
  ggplot() +
  geom_col(aes(x= year, y= total_generation)) +
  geom_smooth(aes(x = year, y= total_generation)) +
  labs(title = "Texas Total Wind Power Generation Per Year", x = "Year", y = "Power Generation(TWH)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Texas Total Petroleum Generation Per Year

energy %>% filter(producer == "Total Electric Power Industry", state == "TX", source == "Petroleum", year != 2022) %>% group_by(year) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>% 
  ggplot() +
  geom_col(aes(x= year, y= total_generation)) +
  geom_smooth(aes(x = year, y= total_generation)) +
  labs(title = "Texas Total Petroleum Generation Per Year", x = "Year", y = "Power Generation(TWH)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Texas Total Solar Power Generation Per Year

energy %>% filter(producer == "Total Electric Power Industry", state == "TX", source == "Solar Thermal and Photovoltaic", year != 2022) %>% group_by(year) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>% 
  ggplot() +
  geom_col(aes(x= year, y= total_generation)) +
  geom_smooth(aes(x = year, y= total_generation)) +
  labs(title = "Texas Total Solar Power Generation Per Year", x = "Year", y = "Power Generation(TWH)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Total Power Generation Over Time

energy %>% filter(producer == "Total Electric Power Industry", state == "TX", year != 2022, source == c("Solar Thermal and Photovoltaic", "Petroleum", "Wind")) %>% group_by(Date, source) %>% 
  summarize(total_generation = sum(generation)) %>% mutate(total_generation = total_generation / 1000000) %>%
  ggplot() +
  geom_line(aes(x= Date, y= total_generation, color = source)) +
  geom_smooth(aes(x = Date, y= total_generation, color = source)) +
  labs(title = "Total Power Generation Over Time", x = "Date", y = "Power Generation(TWH)")
## Warning in source == c("Solar Thermal and Photovoltaic", "Petroleum", "Wind"):
## longer object length is not a multiple of shorter object length
## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'